# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

import scrapy
from scrapy.loader.processors import TakeFirst,MapCompose,Compose
from w3lib.html import remove_tags

def list_2_dic(lis):
    assert len(lis)%2 == 0, '奇数错误 %s'%lis
    ret_dic = {}
    for _ in range(int(len(lis)/2)):
        ret_dic[lis[_*2].strip('： ')]=lis[_*2+1].strip()
    return ret_dic

class TzjItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    _id = scrapy.Field(output_processor=TakeFirst())
    _status = scrapy.Field(output_processor=TakeFirst())
    _update = scrapy.Field(output_processor=TakeFirst())

    desc = scrapy.Field(
        input_processor=MapCompose(remove_tags),
        output_processor=TakeFirst())
    fundinground = scrapy.Field()
    contact = scrapy.Field(
        output_processor=Compose(list_2_dic),
        # output_processor=TakeFirst()
    )
    full_name = scrapy.Field(
        output_processor=TakeFirst())
    name = scrapy.Field(
        output_processor=TakeFirst())
    en_name = scrapy.Field(
        output_processor=TakeFirst())
    机构总部 = scrapy.Field(
        output_processor=TakeFirst())
    注册地点 = scrapy.Field(
        output_processor=TakeFirst())
    成立时间 = scrapy.Field(
        output_processor=TakeFirst())
    所属行业 = scrapy.Field(
        output_processor=TakeFirst())
    官方网站 = scrapy.Field(
        output_processor=TakeFirst())
